import htmllib
import formatter
import string
import urllib, urlparse

class myParser(htmllib.HTMLParser):

    def __init__(self, base):
        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
        self.anchors = []
        self.base = base

    def anchor_bgn(self, href, name, type):
        self.save_bgn()
        if self.base:
            self.anchor = urlparse.urljoin(self.base, href)
        else:
            self.anchor = href

    def anchor_end(self):
        text = string.strip(self.save_end())
        if self.anchor and text:
            self.anchors.append((self.anchor, text))

if __name__ == '__main__':

    URL = "rss_1.xml"

    f = urllib.urlopen(URL)

    p = myParser(URL)
    p.feed(f.read())
    p.close()

    #print "anchors =", p.anchors
    #print "title =", p.title

    of = open('links.txt', 'a')
    for a in p.anchors:
        #print a[0],' -- '*3 ,a[1]
        of.write("%s|%s\n"%(a[0],a[1]))
    of.close()
